4 Laboratory sample processing

4.1 DNA extraction

4.1.1 General statistics

read_tsv("data/extraction.tsv") %>%
   summarise(
    max= max(extraction_total, na.rm = TRUE),
    min= min(extraction_total, na.rm = TRUE),
    mean= mean(extraction_total, na.rm = TRUE),
    sd = sd(extraction_total, na.rm = TRUE)
  ) %>%
  tt()
tinytable_eymmxi0dfllbebp6wbu0
max min mean sd
7110 0 365.683 677.4012

4.1.2 Sample types

4.1.2.1 Data distribution

left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab"))  %>%
  ggplot(aes(x=sample_type, y= extraction_total, fill=sample_type, color=sample_type)) + 
    ylim(0, 2000) +
    geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
    stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
    scale_color_manual(values = c("#bdca50", "#AA3377")) +   
    scale_fill_manual(values = c("#bdca5050", "#AA337750")) +
    labs(y="Density",x="DNA yield", fill="Sample type", color="Sample type") +
    theme_classic()

ggsave("figures/extraction_type.pdf",width=6, height=4, units="in")

4.1.2.2 Comparison

left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  mutate(extraction_total = ifelse(extraction_total == 0, 0.00001, extraction_total)) %>%
  mutate(extraction_total = log(extraction_total)) %>% 
  select(extraction_total,sample_type) %>%
  mutate(sample_type = factor(sample_type)) %>%
  wilcox.test(extraction_total ~ sample_type,data=.) %>%
  tidy()
# A tibble: 1 × 4
  statistic  p.value method                                            alternative
      <dbl>    <dbl> <chr>                                             <chr>      
1    335473 3.41e-41 Wilcoxon rank sum test with continuity correction two.sided  
left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  group_by(sample_type)  %>%
  summarise(
    mean= mean(extraction_total, na.rm = TRUE),
    sd = sd(extraction_total, na.rm = TRUE)) %>%
  tt()
tinytable_imdxymok3nxt1haqrlgd
sample_type mean sd
Anal/cloacal swab 170.2285 362.7389
Faecal 316.6666 423.6434

4.1.3 Taxonomy

4.1.3.1 Data distribution

left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab"))  %>%
  mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
  ggplot(aes(y=extraction_total,x=tax_group,color=tax_group,fill=tax_group)) + 
    ylim(0, 2000) +
    geom_jitter(position = position_jitter(width = 0.2), alpha = 0.5, size=0.5) +
    stat_halfeye(adjust = 0.5,width = 0.5, .width = 0, justification = -.55,normalize = "groups") +
    scale_color_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
    scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
    labs(y="DNA yield",x="Taxonomic group", color="Sample type") +
    theme_classic()

ggsave("figures/extraction_taxa.pdf",width=9, height=4, units="in")

4.1.3.2 Comparison

left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  mutate(extraction_total = ifelse(extraction_total == 0, 0.00001, extraction_total)) %>%
  mutate(extraction_total = log(extraction_total)) %>% 
  select(extraction_total,tax_group) %>%
  mutate(sample_type = factor(tax_group)) %>%
  kruskal.test(extraction_total ~ tax_group,data=.)

    Kruskal-Wallis rank sum test

data:  extraction_total by tax_group
Kruskal-Wallis chi-squared = 946.6, df = 4, p-value < 2.2e-16
left_join(read_tsv("data/extraction.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  group_by(tax_group)  %>%
  summarise(
    mean= mean(extraction_total, na.rm = TRUE),
    sd = sd(extraction_total, na.rm = TRUE)) %>%
  tt()
tinytable_hnk4655mxlsc8ksw260i
tax_group mean sd
Amphibians 233.35078 358.90219
Bats 59.60574 92.00357
Birds 74.43550 262.34517
Mammals 463.93202 471.41487
Reptiles 394.50619 437.86229

4.2 Sequencing library preparation

4.2.1 Overall

left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  filter(library_PCR_cycles_required > 0) %>%
   summarise(
    input=mean(library_input_dna, na.rm = TRUE),
    max= max(library_PCR_cycles_required, na.rm = TRUE),
    min= min(library_PCR_cycles_required, na.rm = TRUE),
    mean= mean(library_PCR_cycles_required, na.rm = TRUE),
    sd = sd(library_PCR_cycles_required, na.rm = TRUE)) %>%
  tt()
tinytable_4656bxvjrlftbkvxho1m
input max min mean sd
89.34565 33 2 9.150046 4.390537
left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  lm(library_PCR_cycles_required ~ library_input_dna, data = .)  %>%
  tidy()
# A tibble: 2 × 5
  term              estimate std.error statistic   p.value
  <chr>                <dbl>     <dbl>     <dbl>     <dbl>
1 (Intercept)        11.6     0.110        105.  0        
2 library_input_dna  -0.0284  0.000901     -31.6 1.11e-183
left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  lm(library_PCR_cycles_required ~ library_input_dna * sample_type, data = .)  %>%
  anova() %>%
  tidy()
# A tibble: 4 × 6
  term                             df      sumsq     meansq  statistic    p.value
  <chr>                         <int>      <dbl>      <dbl>      <dbl>      <dbl>
1 library_input_dna                 1 14071.     14071.     1071.       4.30e-195
2 sample_type                       1  2439.      2439.      186.       8.48e- 41
3 library_input_dna:sample_type     1     0.0500     0.0500    0.00381  9.51e-  1
4 Residuals                      2441 32067.        13.1      NA       NA        
left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  lm(library_PCR_cycles_required ~ library_input_dna * tax_group, data = .)  %>%
  anova() %>%
  tidy()
# A tibble: 4 × 6
  term                           df  sumsq  meansq statistic    p.value
  <chr>                       <int>  <dbl>   <dbl>     <dbl>      <dbl>
1 library_input_dna               1 14071. 14071.    1145.    5.17e-206
2 tax_group                       4  4140.  1035.      84.2   4.74e- 67
3 library_input_dna:tax_group     4   435.   109.       8.85  4.32e-  7
4 Residuals                    2435 29931.    12.3     NA    NA        

4.2.2 Sample types

left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  filter(library_input_dna <= 200) %>%
  filter(library_PCR_cycles_required > 0) %>%
    ggplot(aes(y=library_PCR_cycles_required, x=library_input_dna, color=sample_type, fill=sample_type, group=sample_type)) +
        geom_point(alpha=0.5, size=0.5) +
        stat_smooth(method = "lm", formula = y ~ x, geom = "smooth") +
        scale_color_manual(values = c("#bdca50", "#AA3377")) +
        scale_fill_manual(values = c("#bdca5050", "#AA337750")) +
        theme_classic() +
        labs(y="Required number of cycles",x="Amount of inputted DNA (ng)", color="Sample type") +
        theme_classic()

ggsave("figures/cycles_type.pdf",width=6, height=3, units="in")

4.2.2.1 Comparisons

Relationship between the amount of inputted DNA (ng) and the required number of cycles.

left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  filter(library_PCR_cycles_required>0 & library_input_dna>0) %>% 
  lme(library_PCR_cycles_required ~ library_input_dna, random=~1 | sample_type, data = .) %>%
  broom.mixed::tidy() %>%
  tt()
tinytable_3t6aesxdiml6ykppkqdm
effect group term estimate std.error df statistic p.value
fixed NA (Intercept) 11.87417166 1.1716203608 2330 10.13483 1.187767e-23
fixed NA library_input_dna -0.02501844 0.0008256453 2330 -30.30168 2.507418e-170
ran_pars sample_type sd_(Intercept) 1.64945139 NA NA NA NA
ran_pars Residual sd_Observation 3.30432885 NA NA NA NA

Comparison between slopes.

left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  lm(library_PCR_cycles_required ~ library_input_dna * sample_type, data = .)  %>%
  broom::tidy() %>%
  tt()
tinytable_xvs656yvg30yumm1bgzi
term estimate std.error statistic p.value
(Intercept) 13.3047246163 0.187031107 71.13642643 0.000000e+00
library_input_dna -0.0260524410 0.001812871 -14.37082274 5.140847e-45
sample_typeFaecal -2.4762116244 0.227612093 -10.87908638 5.960119e-27
library_input_dna:sample_typeFaecal 0.0001282468 0.002078874 0.06169052 9.508143e-01

4.2.3 Taxonomy

left_join(read_tsv("data/library.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>% 
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  filter(library_input_dna <= 200) %>%
  filter(library_PCR_cycles_required > 0) %>%
  mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(., aes(y=library_PCR_cycles_required, x=library_input_dna, color=tax_group, group=tax_group)) +
        geom_point(alpha=0.5, size=0.5) +
        stat_smooth(aes(fill = tax_group), method = "lm", formula = y ~ x, geom = "smooth") +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        labs(y="Required number of cycles",x="Amount of inputted DNA (ng)", color="Sample type", fill="Sample type") +
        theme_classic()

ggsave("figures/cycles_taxa.pdf",width=6, height=3, units="in")

4.3 Data quality

left_join(read_tsv("data/preprocessing.tsv"),read_tsv("data/sample.tsv"),by="sample_id") %>%
  left_join(read_tsv("data/sequence.tsv"),by="sequence_id") %>%
  left_join(read_tsv("data/index.tsv"),by="index_id") %>%
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  mutate(lowqual_perc_reads=(1-reads_post_fastp/reads_pre_fastp)*100) %>%
  mutate(lowqual_perc_bases=(1-bases_post_fastp/bases_pre_fastp)*100) %>%
  #Plot map  EE6677 < bats
  ggplot(aes(y=lowqual_perc_bases, x=index_PCR_cycles_given, colour=sample_type, fill=sample_type, group=sample_type)) +
    geom_jitter(alpha=0.3) +
    stat_smooth(method = "lm", formula = y ~ x, geom = "smooth", alpha=0.2) +
    scale_color_manual(values = c("#bdca50", "#AA3377")) +   
    scale_fill_manual(values = c("#bdca5080", "#AA337780")) +
    scale_x_log10() +   
    theme_classic()

left_join(read_tsv("data/preprocessing.tsv"),read_tsv("data/sample.tsv"),by="sample_id") %>%
  left_join(read_tsv("data/sequence.tsv"),by="sequence_id") %>%
  left_join(read_tsv("data/index.tsv"),by="index_id") %>%
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  mutate(lowqual_perc_reads=(1-reads_post_fastp/reads_pre_fastp)*100) %>%
  mutate(lowqual_perc_bases=(1-bases_post_fastp/bases_pre_fastp)*100) %>%
  mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
  ggplot(aes(y=lowqual_perc_bases, x=index_PCR_cycles_given, colour=tax_group, fill=tax_group, group=tax_group)) +
    geom_point(alpha=0.3) +
    stat_smooth(method = "lm", formula = y ~ x, geom = "smooth", alpha=0.2) +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
    scale_x_log10() +   
    theme_classic() +
    theme(legend.position = "bottom")

4.4 Data duplication

left_join(read_tsv("data/preprocessing.tsv"),read_tsv("data/sample.tsv"),by="sample_id") %>%
  left_join(read_tsv("data/sequence.tsv"),by="sequence_id") %>%
  left_join(read_tsv("data/index.tsv"),by="index_id") %>%
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  mutate(lowqual_perc_reads=(1-reads_post_fastp/reads_pre_fastp)*100) %>%
  mutate(lowqual_perc_bases=(1-bases_post_fastp/bases_pre_fastp)*100) %>%
  #Plot map  EE6677 < bats
  ggplot(aes(y=host_duplicate_fraction, x=index_PCR_cycles_given, colour=sample_type, fill=sample_type, group=sample_type)) +
    geom_jitter(alpha=0.3) +
    stat_smooth(method = "lm", formula = y ~ x, geom = "smooth", alpha=0.2) +
    scale_color_manual(values = c("#bdca50", "#AA3377")) +   
    scale_fill_manual(values = c("#bdca5080", "#AA337780")) +
    scale_x_log10() +   
    theme_classic()

left_join(read_tsv("data/preprocessing.tsv"),read_tsv("data/sample.tsv"),by="sample_id") %>%
  left_join(read_tsv("data/sequence.tsv"),by="sequence_id") %>%
  left_join(read_tsv("data/index.tsv"),by="index_id") %>%
  filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
  mutate(lowqual_perc_reads=(1-reads_post_fastp/reads_pre_fastp)*100) %>%
  mutate(lowqual_perc_bases=(1-bases_post_fastp/bases_pre_fastp)*100) %>%
  mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
  ggplot(aes(y=host_duplicate_fraction, x=index_PCR_cycles_given, colour=tax_group, fill=tax_group, group=tax_group)) +
    geom_point(alpha=0.3) +
    stat_smooth(method = "lm", formula = y ~ x, geom = "smooth", alpha=0.2) +
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
    scale_x_log10() +   
    theme_classic() +
    theme(legend.position = "bottom")